library(mosaic)
library(tidyverse)
library(lubridate)
library(DataComputing)
library(rvest)
How do different aspects of determining how developed a country is (such as birth rate, population, death rate, etc.) manifest themselves in the spread of COVID-19. Essentially, how does COVID-19 show different levels of impact on different countries?
Reading in the Data:
Data Source 1: COVID
COVID <- read.csv(file = "total-covid-cases-deaths-per-million.csv")
COVID
COVID %>%
nrow()
[1] 9487
COVID %>%
names()
[1] "total.covid.cases.deaths.per.million" "X" "X.1"
[4] "X.2" "X.3" "X.4"
[7] "X.5" "X.6" "X.7"
[10] "X.8" "X.9" "X.10"
[13] "X.11" "X.12" "X.13"
[16] "X.14" "X.15" "X.16"
[19] "X.17" "X.18" "X.19"
[22] "X.20" "X.21" "X.22"
[25] "X.23" "X.24" "X.25"
[28] "X.26" "X.27" "X.28"
[31] "X.29" "X.30" "X.31"
[34] "X.32" "X.33" "X.34"
[37] "X.35" "X.36" "X.37"
[40] "X.38" "X.39" "X.40"
[43] "X.41" "X.42" "X.43"
[46] "X.44" "X.45" "X.46"
[49] "X.47" "X.48" "X.49"
[52] "X.50" "X.51" "X.52"
[55] "X.53" "X.54" "X.55"
[58] "X.56" "X.57" "X.58"
[61] "X.59" "X.60" "X.61"
[64] "X.62" "X.63" "X.64"
[67] "X.65" "X.66" "X.67"
[70] "X.68" "X.69" "X.70"
[73] "X.71" "X.72" "X.73"
[76] "X.74" "X.75" "X.76"
[79] "X.77" "X.78" "X.79"
[82] "X.80" "X.81" "X.82"
[85] "X.83" "X.84" "X.85"
[88] "X.86" "X.87" "X.88"
[91] "X.89" "X.90" "X.91"
[94] "X.92" "X.93" "X.94"
[97] "X.95" "X.96" "X.97"
[100] "X.98" "X.99" "X.100"
[103] "X.101" "X.102" "X.103"
[106] "X.104" "X.105" "X.106"
[109] "X.107" "X.108" "X.109"
[112] "X.110" "X.111" "X.112"
[115] "X.113" "X.114" "X.115"
[118] "X.116" "X.117" "X.118"
[121] "X.119" "X.120" "X.121"
[124] "X.122" "X.123" "X.124"
[127] "X.125" "X.126" "X.127"
[130] "X.128" "X.129" "X.130"
[133] "X.131" "X.132" "X.133"
[136] "X.134" "X.135" "X.136"
[139] "X.137" "X.138" "X.139"
[142] "X.140" "X.141" "X.142"
[145] "X.143" "X.144" "X.145"
[148] "X.146" "X.147" "X.148"
[151] "X.149" "X.150" "X.151"
[154] "X.152" "X.153" "X.154"
[157] "X.155" "X.156" "X.157"
[160] "X.158" "X.159" "X.160"
[163] "X.161" "X.162" "X.163"
[166] "X.164" "X.165" "X.166"
[169] "X.167" "X.168" "X.169"
[172] "X.170" "X.171" "X.172"
[175] "X.173" "X.174" "X.175"
[178] "X.176" "X.177" "X.178"
[181] "X.179" "X.180" "X.181"
[184] "X.182" "X.183" "X.184"
[187] "X.185" "X.186" "X.187"
[190] "X.188" "X.189" "X.190"
[193] "X.191" "X.192" "X.193"
[196] "X.194" "X.195" "X.196"
[199] "X.197" "X.198" "X.199"
[202] "X.200" "X.201" "X.202"
[205] "X.203" "X.204" "X.205"
[208] "X.206" "X.207" "X.208"
[211] "X.209" "X.210" "X.211"
[214] "X.212" "X.213" "X.214"
[217] "X.215" "X.216" "X.217"
[220] "X.218" "X.219" "X.220"
[223] "X.221" "X.222" "X.223"
[226] "X.224" "X.225" "X.226"
[229] "X.227" "X.228" "X.229"
[232] "X.230" "X.231" "X.232"
[235] "X.233" "X.234" "X.235"
[238] "X.236" "X.237" "X.238"
[241] "X.239" "X.240" "X.241"
[244] "X.242" "X.243" "X.244"
[247] "X.245" "X.246" "X.247"
[250] "X.248" "X.249" "X.250"
[253] "X.251" "X.252" "X.253"
[256] "X.254"
COVID %>%
head()
Data Source 2: CountryData
CountryData
CountryData %>%
nrow()
[1] 256
CountryData %>%
names()
[1] "country" "area" "pop" "growth" "birth" "death"
[7] "migr" "maternal" "infant" "life" "fert" "health"
[13] "HIVrate" "HIVpeople" "HIVdeath" "obesity" "underweight" "educ"
[19] "unemploymentYouth" "GDP" "GDPgrowth" "GDPcapita" "saving" "indProd"
[25] "labor" "unemployment" "family" "tax" "budget" "debt"
[31] "inflation" "discount" "lending" "narrow" "broad" "credit"
[37] "shares" "balance" "exports" "imports" "gold" "externalDebt"
[43] "homeStock" "abroadStock" "elecProd" "elecCons" "elecExp" "elecImp"
[49] "elecCap" "elecFossil" "elecNuc" "elecHydro" "elecRenew" "oilProd"
[55] "oilExp" "oilImp" "oilRes" "petroProd" "petroCons" "petroExp"
[61] "petroImp" "gasProd" "gasCons" "gasExp" "gasImp" "gasRes"
[67] "mainlines" "cell" "netHosts" "netUsers" "airports" "railways"
[73] "roadways" "waterways" "marine" "military"
CountryData %>%
head()
Data Source 1: Continents
Continents <- read.csv(file = "countries and continents.csv")
Continents
Continents %>%
nrow()
[1] 251
Continents %>%
names()
[1] "name" "official_name_en" "official_name_fr"
[4] "ISO3166.1.Alpha.2" "ISO3166.1.Alpha.3" "M49"
[7] "ITU" "MARC" "WMO"
[10] "DS" "Dial" "FIFA"
[13] "FIPS" "GAUL" "IOC"
[16] "ISO4217.currency_alphabetic_code" "ISO4217.currency_country_name" "ISO4217.currency_minor_unit"
[19] "ISO4217.currency_name" "ISO4217.currency_numeric_code" "is_independent"
[22] "Capital" "Continent" "TLD"
[25] "Languages" "Geoname.ID" "EDGAR"
Continents %>%
head()
COVID
TidyCOVID <-
COVID %>%
rename(c("country" = "total.covid.cases.deaths.per.million") ) %>%
rename(c("Code" = "X") ) %>%
rename(c("Date" = "X.1") ) %>%
rename(c("DeathsPerMillion" = "X.2")) %>%
rename(c("CasesPerMillion" = "X.3")) %>%
filter(row_number() > 1) %>%
subset(select = c(1,2,3,4,5)) %>%
mutate( country = as.character(country) ) %>%
mutate( Code = as.character(Code) ) %>%
mutate(Date = mdy(Date)) %>%
mutate(DeathsPerMillion = as.integer(DeathsPerMillion) - 1) %>%
mutate(CasesPerMillion = as.integer(CasesPerMillion) - 1)
Error: All arguments must be named
[90mRun `rlang::last_error()` to see where the error occurred.[39m
rr TidyCOVID
Continents
TidyCont <-
Continents %>%
filter(row_number() > 2) %>%
subset(select = c(1,23)) %>%
rename(c("country" = "name") ) %>%
mutate( country = as.character(country) ) %>%
mutate( Continent = as.character(Continent) )
TidyCont
DataMix <- left_join(TidyCOVID, CountryData, by = "country")
JOEY: I think this is a good one to start out with, but if there is some way that you could make this more clear (color code, etc.) that would be great. This is a good start to our data.
ggplot(data = DataMix,aes(x = Date, y = CasesPerMillion)) +
geom_point()
JOEY: I like this graph because it visualizes one of the factors that contributes to the development classification. Can we add in a color code or something that would represent one more factor, such as different countries or regions of the world. The more that we can do with this one, the better.
ggplot(data=Growth, aes(x=health, y = DeathsPerMillionMarchGrowth)) +
geom_point()
JOEY: I also liked this graph, but we need to remove the outlier that scews it and makes the scale hard to see a relationship. It is the one with an X-value of over 10,000
ggplot(data=Growth, aes(x = airports,y = CasesPerMillionMarchGrowth)) +
geom_point()
JOEY: I think one more graph here will be sufficient. Can we do one that compares the relationship between Cases per Million, Deaths per million, and population!!! Maybe something like a cloropleth graph (I think thats what I’m thinking of)?